
/*************************************************************************

Author:		Jacobus Cilliers

Overview:	Replication code for the following paper. 

Stern, Cilliers, Jukes, Fleisch,  Taylor and  Mohohlwane. "Persistence and Emergence of Literacy Skills: Long-Term Impacts of an Effective Early Grade Reading Intervention in South Africa".  Journal of Research on Education Effectiveness. 2024


- Panel data, each observation is a different student. 
- Variables starting with BL_l: baseline data collection (when non-repeating students were in grade 2)
- Variables starting with W4_: fourth wave of data data collection (when non-repeating students were in grade 4)
- Variables starting with W5_: fifth wave of data data collection (when non-repeating students were in grade 7)


Created: 	September 25 2024

***************************************************************************/
clear
set more off


**Install packages (if necessary) **
/*ssc install outreg2
ssc install qreg2
ssc install coefplot
ssc install blindschemes
*/
/************************************************************************
SET UP: GLOBALS (FILE STRUCTURE, CONTROL VARIABLES, AND COLOR SCHEMES)
*************************************************************************/
*Define your own global here. 
global root "..." 
	
global fig "$root/Output Files/Figures"
global tables "$root/Output Files/Tables"
global data "$root/Data"

*Controls: Baseline scores, prior demographic controls, wave 5 assets
global BL_controls BL_score_A BL_score_B BL_score_C BL_score_D BL_score_E BL_score_F_compr BL_score_Fwords
global controls female_W2 district ANA_new P_ed2-P_ed6
global community_controls C2011_SP_wealth_index C2011_attendance
global w5_controls student_asset W5_b2_books


*Color schemes
global richblack 33 39 33 
global medblue 0 103 185
global darkgray 108 100 99
global darkred 101 29 50
global usaidred 186 12 47 
global usaidblue 0 47 108

*Define color scheme
set scheme plotplain


/************************************************************************
ANALYSIS
*************************************************************************/


***************
*** Table 1	***
***************
*Manually entered. No analysis


***************
*** Table 2	***
***************

use "$data/EGRS_wave7_for_replication", clear  


*Run regressions and output them 
	qui reg W5_attrite treatment i.strata, robust cl(W2_idschool)
		qui sum W5_attrite if treatment == 0
		local m = `r(mean)'
		eststo attrite, addscalars(m `m')
		outreg2 using "$tables/student_level_attrition.xls", replace keep(treatment) label dec(2) nocons nonotes nodepvars adds(Mean attrition, `m')

	foreach var of varlist age_best_W1and2 female_W2 BL_totscore_SD {
		qui reg `var' treatment W5_attrite W5_attrite_x_treat i.strata, robust  cl(W2_idschool)
		outreg2 using "$tables/student_level_attrition.xls", append keep( treatment W5_attrite W5_attrite_x_treat ) label dec(2) nocons nonotes nodepvars
	}
		
		
****************
*** Table 3. ***
****************

*Run regressions and output to a table
cap cap erase "$tables/Table3.xls"
cap erase "$tables/Table3.txt"
	foreach var of varlist W5_g7hl_orf2_3_pm W5_g7hl_wc_score_pcnt W5_g7efal_tc_comp_sp ontrack {
		qui reg `var' treatment $BL_controls $controls $community_controls $w5_controls i.strata, cl(W2_idschool)		
		qui sum `var' if treat == 0 
		local e `r(mean)'
		outreg2 using "$tables/Table3.xls", append keep(treatment) label nocons nonotes adds(Control mean, `e') dec(1)
	}
	
*Calculate the standardized effect sizes (added manually to the bottom of the table)	
	foreach var of varlist W5_g7hl_orf2_3_pm W5_g7hl_wc_score_pcnt W5_g7efal_tc_comp_sp {
		reg Z_`var' treatment $BL_controls $controls $community_controls $w5_controls i.strata, cl(W2_idschool)
		}	


***************
*** Table 4 ***
***************

*Drop the standardized scores, because I need to recreate them with the new sample of students assessed across both waves.
drop Z_*

cap cap erase "$tables/table4.xls"
cap erase "$tables/table4.txt"

*Regressions for Setswana ORF
foreach var of varlist 	gr4_la_tsk_5_tot W5_g7hl_orf2_3_pm   {
	qui reg `var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
	if gr4_la_tsk_5_tot ~= . & W5_g7hl_orf2_3_pm~= ., cl(W2_idschool)
	qui sum `var' if treat == 0 & gr4_la_tsk_5_tot ~= . & W5_g7hl_orf2_3_pm~= .
	local e `r(mean)'
	outreg2 using "$tables/table4.xls", append ///
	keep(treatment) label nocons nonotes adds(Control mean, `e') bdec(1) sdec(1)
	**Also show effect sizes in terms of SDs (added manually to bottom row of the table)
	qui sum `var' if treat == 0 & (gr4_la_tsk_5_tot ~= . & W5_g7hl_orf2_3_pm ~= .)
	qui gen Z_`var' = (`var' - `r(mean)')/`r(sd)'	
	reg Z_`var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
	if gr4_la_tsk_5_tot ~= . & W5_g7hl_orf2_3_pm~= ., cl(W2_idschool)
	
}

*Regressions for Setswana Written comprehension
foreach var of varlist  gr4_wa_home_mean 	W5_g7hl_wc_score_pcnt	{ 
	qui reg `var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
			if (gr4_wa_home_mean ~= . & W5_g7hl_wc_score_pcnt ~= .), cl(W2_idschool)
	
	qui sum `var' if treat == 0 & (gr4_wa_home_mean ~= . & W5_g7hl_wc_score_pcnt ~= .)
	local e `r(mean)'
	outreg2 using "$tables/table4.xls", append ///
	keep(treatment) label nocons nonotes adds(Control mean, `e') bdec(1) sdec(1)
	*Also show effect sizes in terms of SDs (added manually to bottom row of the table)
	qui sum `var' if treat == 0 & (gr4_wa_home_mean ~= . & W5_g7hl_wc_score_pcnt ~= .)
	qui gen Z_`var' = (`var' - `r(mean)')/`r(sd)'	
	reg Z_`var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
	if gr4_wa_home_mean ~= . & W5_g7hl_wc_score_pcnt~= ., cl(W2_idschool)

}

*Regressions for English Written comprehension
foreach var of varlist  gr4_wa_engl_mean W5_g7efal_tc_comp_sp { 
	qui  reg `var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
		if (gr4_wa_engl_mean ~= . & W5_g7efal_tc_comp_sp ~= .)
	qui sum `var' if treat == 0 & (gr4_wa_engl_mean ~= . & W5_g7efal_tc_comp_sp ~= .)
	local e `r(mean)'
	outreg2 using "$tables/table4.xls", append ///
	keep(treatment) label nocons nonotes adds(Control mean, `e')  bdec(1) sdec(1)
	*Also show effect sizes in terms of SDs (added manually to bottom row of the table)
	qui sum `var' if treat == 0 & (gr4_wa_engl_mean ~= . & W5_g7efal_tc_comp_sp ~= .)
	qui gen Z_`var' = (`var' - `r(mean)')/`r(sd)'	
	reg Z_`var' treatment $BL_controls $controls $community_controls $w5_controls i.strata ///
	if gr4_wa_engl_mean ~= . & W5_g7efal_tc_comp_sp~= ., cl(W2_idschool)
}


*******************
*** Figure 1	***
*******************

use "$data/EGRS_wave7_for_replication", clear  


		gen W5_student_attrite = W5_attrite == 1 & drop_flag == 0
		
		graph bar drop_flag W5_student_attrite W5_gr4 W5_gr5 W5_gr6 W5_gr7, percentage ///
				over(treatment, label(angle(0))) stack ///
				ylabel(0(20)100) ///
				ytitle("Percentage of students") ///
				legend( position(6) size(small) region(lcolor(white)) rows(2) order (6 "Grade 7"  5 "Grade 6" 4 "Grade 5"  3 "Grade 4" 2 "Attrited (student)" 1 "Attrited (school)")) ///
				blabel(bar, position(center) format(%9.1f) size(small) color(white)) ///
				bar(2, color($usaidred)) bar(3, color($usaidblue)) bar(4, color($medblue)) bar(5, color(gs12)) ///
				bar(6, color($darkgray)) graphregion(color(white))
			graph export "$fig/Figure1(b).png", replace
			

		graph bar W4_attrite W4_gr2 W4_gr3 W4_gr4, percentage ///
				over(treatment, label(angle(0))) stack ///
				ylabel(0(20)100) ///
				ytitle("Percentage of students") ///
				legend( position(6) size(small) region(lcolor(white)) rows(2) order (4 "Grade 4" 3 "Grade 3"  2 "Grade 2" 1 "Attrited")) ///
				blabel(bar, position(center) format(%9.1f) size(small) color(white)) ///
				bar(1, color($usaidred))  bar(2, color($medblue)) bar(3, color(gs12)) ///
				bar(4, color($darkgray)) graphregion(color(white))
			graph export "$fig/Figure1(a).png", replace
		
		
		
*******************
*** Figure 2	***
*******************
	local i = 1
	foreach var of varlist W5_g7hl_orf2_3_pm W5_g7hl_wc_score_pcnt W5_g7efal_tc_comp_sp {
		histogram `var'
		graph export "$fig/Figure2(`i').png", replace
		local i = `i' + 1
	}
****************
*** Figure 3 ***
****************

*Run the quantile regressions and export as a .png file

local i = 1
foreach var of varlist  W5_g7hl_orf2_3_pm W5_g7hl_wc_score_pcnt W5_g7efal_tc_comp_sp {
	* Define regression options
	local quantileInterval = 0.1
	local statSignLevel    = 90	
	*loop on the variables
	* Initiate locals (to be filled in the next loop)
	local quantileLabels   = ""		//x-axis label
	local modelsList 	   = ""		//estimates name
	local quantileCount    = 1		//count of quantile regression
	* Loop on quantiles
	forv  quantile 		   = `quantileInterval'(`quantileInterval')`=1-`quantileInterval'' {
			
		local    roundQuantile =  round(real(string(`quantile'*100, "%4.0f")),10)/100 //this is needed, otherwise Stata gives issue with the last numbers of the sequence and with rounding
		local  integerQuantile = `roundQuantile'*100
			
		* Store estimates from quantile regression with fixed effects and clustered standard errors
		eststo est_q`integerQuantile': 									///
		   qui qreg2 `var' treatment ///
			 , q(`roundQuantile') cl(W2_idschool)
			
		* Add these estimate to locals
		local  quantileLabels `"`quantileLabels' `quantileCount++' "`integerQuantile'"	"'
		local  modelsList 	  `"`modelsList' 						  est_q`integerQuantile' || "'
	}
		coefplot `modelsList', keep(treatment) ///
			vertical bycoefs msymbol(diamond) mcolor(black) 	///
			levels(`statSignLevel') recast(line) lwidth(*2)	lcolor(black) ciopts(recast(rline) lcolor(black*0.75) lpattern(dash))				///
			   title("")  ytitle("Standard deviations") ///
			   yline(0, lstyle(foreground)) xlab(none) xlab(`quantileLabels', add)	legend(off) graphregion(color(white))
		graph export "$fig/Figure3_`i'.png", replace 
		local i = `i' + 1
	}


		